
#####################################################
#
#               Code to replicate 
#  Taxing the 1 percent: Public opinion vs. public policy 
#   
#                MANUSCRIPT 
#
#    Ruben Berge Mathisen (University of Bergen)
#
#       Tables and Figures for Manuscript
#                June 2023
#
#   Original analyses conducted in R version 3.5.1 
#
#####################################################

# Theme 
theme_m <- function(...) {
  theme(
    text = element_text(size = 12,
                        colour = "black"),
    axis.text = element_text(size = 10,
                             colour = "black"),
    axis.title = element_text(size = 11,
                              colour = "black"),
    axis.line = element_line(size=0.3),
    axis.ticks = element_line(colour = "black"),
    axis.ticks.length = unit(1, "mm"),
    plot.margin = margin(1, 5, 1, 1, "mm"),
    panel.spacing.x = unit(7.5, "mm"),
    panel.spacing.y = unit(2.5, "mm"),
    panel.background = element_blank(),
    panel.border = element_rect(colour = "black", size=0.2,fill=NA),
    panel.grid.major = element_line(color="grey93"),
    strip.background = element_blank(),
    strip.text = element_text(size = 10,
                              face = "bold"),
    ## strip.text.y = element_text(angle = 0, face = "bold.italic"),
    strip.text.y = element_blank(),
    legend.background = element_blank(),
    legend.key = element_blank(),
    legend.position = "bottom",
    legend.title = element_text(size = 10, face = "bold"),
    legend.key.height = unit(4, "mm"),
    legend.title.align = .125,
    ...
  )
}


# Packages 
library(dplyr)
library(ggplot2)
library(haven)
library(foreign)
library(car)
library(readxl)
library(tidyr)
library(gridExtra)
library(lme4)
library(ggeffects)
library(kableExtra)
library(plyr)
library(ggjoy)
library(stargazer)
library(scales)
library(quantreg)
library(ggsci)
library(stringr)
library(readr)

# Colors and shapes 
pal_r <- c(pal_nejm("default")(10)[2],
           pal_nejm("default")(10)[1],
           pal_nejm("default")(10)[4],
           pal_nejm("default")(10)[3],
           "#FED439FF",
           "purple1",
           pal_uchicago("default")(1)[1],
           "skyblue2",
           "palegreen1",
           "lightcoral",
           "midnightblue")
shapes <- c(16,15,17,18,8,7,5,13,2,16,15,17,18,8,7,5,13,2,16,15,17,18,8,7,5,13,2)



################# STUDY 1 ################# 

# National Election Studies harmonization [SEE README FILE FOR HOW TO OBTAIN ACCESS TO THE DATA]
## Load data
Valg1965 <- read_dta("Desktop/PHD/Article projects/Datasets/VALGUNDERSOKELSENE/VAlgundersøkelsen 1965/Valgundersøkelsen 1965.dta")
Valg1977 <- read_dta("Desktop/PHD/Article projects/Datasets/VALGUNDERSOKELSENE/Valgundersokelsen 1977/Valgundersøkelsen 1977.dta")
Valg1981 <- read_dta("Desktop/PHD/Article projects/Datasets/VALGUNDERSOKELSENE/Valgundersokelsen 1981/Valgundersøkelsen 1981.dta")
Valg1985 <- read_dta("Desktop/PHD/Article projects/Datasets/VALGUNDERSOKELSENE/Valgundersokelsen 1985/Valgundersøkelsen 1985.dta")
Valg1989 <- read_dta("Desktop/PHD/Article projects/Datasets/VALGUNDERSOKELSENE/Valgundersokelsen 1989/Valgundersøkelsen 1989.dta")
Valg1993 <- read_dta("Desktop/PHD/Article projects/Datasets/VALGUNDERSOKELSENE/Valgundersokelsen 1993/Valgundersøkelsen 1993.dta")
Valg1997 <- read.dta("Desktop/PHD/Article projects/Datasets/VALGUNDERSOKELSENE/Valgundersokelsen 1997/Valgundersøkelsen 1997.dta") # Must use read.dta
Valg2013 <- read_dta("Desktop/PHD/Article projects/Datasets/VALGUNDERSOKELSENE/Valgundersokelsen 2013/Valgundersøkelsen 2013.dta")
## NES 1965
Valg1965$tax <- car::recode(Valg1965$v212,"5=1;1:3=0;else=NA")
Valg1965$gender <- car::recode(Valg1965$v264, "1=1; 2=0; else=NA")
Valg1965$age <- Valg1965$v265
Valg1965$edu <- Valg1965$v286
Valg1965$inc <- Valg1965$v291
Valg1965$party <- car::recode(Valg1965$v021, "1:2=1; 3:5=2; 6=3; else=NA")
Valg1965$year <- 1965
Valg1965 <- Valg1965 %>% dplyr::select(year,tax,gender,age,edu,inc,party)
## NES 1977
Valg1977$tax <- car::recode(Valg1977$v061,"1:2=1;3:5=0;else=NA")
Valg1977$gender <- car::recode(Valg1977$v002, "1=1; 2=0; else=NA")
Valg1977$age <- 77-Valg1977$v001
Valg1977$age[Valg1977$age<0] <- NA
Valg1977$edu <- Valg1977$v321
Valg1977$inc <- Valg1977$v320
Valg1977$inc[Valg1977$inc==9] <- NA
Valg1977$party <- car::recode(Valg1977$v258, "16=1; 15=1; 18=1; 10=1; 11=2; 19=2; 14=2; 17=2; 13=3; 12=3; else=NA")
Valg1977$year <- 1977
Valg1977 <- Valg1977 %>% dplyr::select(year,tax,gender,age,edu,inc,party)
## NES 1981
Valg1981$tax <- car::recode(Valg1981$var109,"1:2=1;3:5=0;else=NA")
Valg1981$gender <- car::recode(Valg1981$var013, "1=1; 2=0; else=NA")
Valg1981$age <- 81-Valg1981$var011
Valg1981$edu <- Valg1981$var397
Valg1981$edu[Valg1981$edu==9] <- NA
Valg1981$inc <- Valg1981$var408
Valg1981$inc[Valg1981$inc==8] <- NA
Valg1981$inc[Valg1981$inc==9] <- NA
Valg1981$wealth1 <- car::recode(Valg1981$var403,"1=1; 2:3=0; else=NA")
Valg1981$wealth2 <- car::recode(Valg1981$var404,"1=1; 2:3=0; else=NA")
Valg1981$wealth3 <- car::recode(Valg1981$var405,"1=1; 2:3=0; else=NA")
Valg1981$wealth4 <- car::recode(Valg1981$var406,"1=1; 2:3=0; else=NA")
Valg1981$wealth5 <- car::recode(Valg1981$var407,"1=1; 2:3=0; else=NA")
Valg1981$wealth <- Valg1981$wealth1+Valg1981$wealth2+Valg1981$wealth3+Valg1981$wealth4+Valg1981$wealth5
Valg1981$party <- car::recode(Valg1981$var302, "16=1; 15=1; 18=1; 10=1; 11=2; 19=2; 14=2; 17=2; 13=3; 12=3; else=NA")
Valg1981$year <- 1981
Valg1981 <- Valg1981 %>% dplyr::select(year,tax,gender,age,edu,inc,party)
## NES 1985
Valg1985$tax <- car::recode(Valg1985$V129,"1:2=1;3:5=0;else=NA")
Valg1985$gender <- car::recode(Valg1985$V16, "1=1; 2=0; else=NA")
Valg1985$age <- 85-Valg1985$V14
Valg1985$edu <- Valg1985$V482
Valg1985$edu[Valg1985$edu==9] <- NA
Valg1985$inc <- Valg1985$V486
Valg1985$inc[Valg1985$inc==8] <- NA
Valg1985$inc[Valg1985$inc==9] <- NA
Valg1985$inc[Valg1985$inc==80] <- NA
Valg1985$party <- car::recode(Valg1985$V383, "16=1; 15=1; 18=1; 10=1; 11=2; 19=2; 14=2; 17=2; 13=3; 12=3; else=NA")
Valg1985$year <- 1985
Valg1985 <- Valg1985 %>% dplyr::select(year,tax,gender,age,edu,inc,party)
## NES 1989
Valg1989$tax <- car::recode(Valg1989$V280,"1:2=1;3:5=0;else=NA")
Valg1989$gender <- car::recode(Valg1989$V16, "1=1; 2=0; else=NA")
Valg1989$age <- 89-Valg1989$V14
Valg1989$edu <- Valg1989$V532
Valg1989$inc <- Valg1989$V534
Valg1989$inc[Valg1989$inc==8] <- NA
Valg1989$inc[Valg1989$inc==9] <- NA
Valg1989$party <- car::recode(Valg1989$V331, "1:4=1; 4:7=2; 8:9=3; else=NA")
Valg1989$year <- 1989
Valg1989 <- Valg1989 %>% dplyr::select(year,tax,gender,age,edu,inc,party)
## NES 1993
Valg1993$tax <- car::recode(Valg1993$v177,"1:2=1;3:5=0;else=NA")
Valg1993$gender <- car::recode(Valg1993$v004, "1=1; 2=0; else=NA")
Valg1993$age <- Valg1993$v003
Valg1993$edu <- Valg1993$v298
Valg1993$inc <- Valg1993$v288
Valg1993$party <- car::recode(Valg1993$v198, "1:4=1; 4:7=2; 8:9=3; else=NA")
Valg1993$year <- 1993
Valg1993 <- Valg1993 %>% dplyr::select(year,tax,gender,age,edu,inc,party)
## NES 1997
Valg1997$tax <- car::recode(as.numeric(Valg1997$v032),"1:2=1;3:5=0;else=NA")
Valg1997$gender <- car::recode(as.numeric(Valg1997$v225), "1=1; 2=0; else=NA")
Valg1997$age <- Valg1997$v223
Valg1997$edu <- as.numeric(Valg1997$v226)
Valg1997$inc <- Valg1997$v222
Valg1997$party <- car::recode(as.numeric(Valg1997$v171), "1:4=1; 4:8=2; 9:10=3; else=NA")
Valg1997$year <- 1997
Valg1997 <- Valg1997 %>% dplyr::select(year,tax,gender,age,edu,inc,party)
## Together 
Valg <- rbind.fill(Valg1965,Valg1977,Valg1981,Valg1985,Valg1989,Valg1993,Valg1997)
## Data for analysis
Valg$year <- as.factor(Valg$year)
Valg$party <- factor(Valg$party)
## Transform income and education variables to percentiles
Valg<-Valg %>% 
  dplyr::group_by(factor(year)) %>%
  dplyr::mutate(inc_dec=factor(ntile(inc,10)),
                edu_dec=factor(ntile(edu,10)))

# Overall support for cutting taxes on high incomes
ovl_sup <- Valg %>%
  group_by(year) %>%
  dplyr::summarise(support=round(mean(tax,na.rm=T)*100))

# Bivariate models of support
## Party
m <- lm(tax~party*year,data=Valg)
p <- data.frame(ggemmeans(m,terms=c("year", "party [1,3]")))
p$group <- car::recode(p$group,"1='Left-wing';3='Right-wing'")
## Income
m1 <- lm(tax~inc_dec*year,data=Valg)
p1 <- ggemmeans(m1,terms=c("year","inc_dec"))
p1 <- p1%>%filter(group==1|group==10)
p1$group <- car::recode(p1$group,"1='Low income';10='High income'")
## Education
m1 <- lm(tax~edu_dec*year,data=Valg)
p2 <- ggemmeans(m1,terms=c("year","edu_dec"))
p2 <- p2%>%filter(group==1|group==10)
p2$group <- car::recode(p2$group,"1='Low education';10='High education'")
## Finalize
p_biv <- rbind.fill(p,p1,p2)
names(p_biv)[names(p_biv)=="x"] <- "year"
p_biv$group <- factor(p_biv$group, levels(factor(p_biv$group))[c(2,3,5,4,6,1)])

# Figure 1: The top marginal income tax rate, and popular support for cutting taxes on high incomes, over time.
## Load Piketty data on top marginal tax over time
mtr <- read_excel("Piketty top marginal tax data, Norway.xlsx")
## Merge with public opinion data
ovl_sup$lab_year <- as.numeric(as.character(ovl_sup$year))
d <- merge(mtr,ovl_sup,by="year",all.x=T)
## Plot 
ggplot(d,aes(x=year)) +
  geom_line(aes(y=mtr*100,linetype="Top marginal tax rate")) +
  geom_point(aes(y=mtr*100,shape="Top marginal tax rate"),size=2) +
  scale_y_continuous(labels=function(x) paste0(x,"%"), 
                     limits=c(20,114),breaks=seq(30,80,10)) +
  scale_x_continuous(breaks=seq(1960,2010,5)) +
  coord_cartesian(ylim=c(30,120)) +
  theme_bw() +
  theme(panel.grid = element_blank(), 
        legend.position = "none",
        plot.caption=element_text(margin=margin(t=15),
                                  size=10,
                                  hjust=0,
                                  color="grey30"),
        axis.text = element_text(color="black")) +
  labs(y=NULL,x=NULL,
       linetype=NULL,
       shape=NULL) +
  geom_text(d%>%filter(!is.na(support)),
            map=aes(label=paste0(round(support),"%")),y=97,size=3,alpha=0.8,color=pal_r[1]) +
  geom_segment(aes(lab_year, y = 25, xend = lab_year, yend = 93),
               alpha=0.4,color=pal_r[1]) +
  geom_point(aes(lab_year, y = 25.75),color="grey80") +
  annotate("text",x=1987,y=106,label="Estimated share of citizens who support\ncutting taxes on high incomes, by year:",size=3,alpha=0.8,color=pal_r[1]) +
  annotate("label",x=1967,y=63,label="Top marginal income
           tax rate",size=3,label.size=NA)


# Figure 2: Support among different sectors of the public for cutting taxes on high incomes.
ggplot(p_biv,aes(year,predicted,shape=group,color=group,group=group)) +
  geom_line(size=0.8) +
  geom_point(fill="white",size=2.4) +
  scale_color_manual(values=pal_r) +
  theme_bw() +
  geom_hline(yintercept = 0.5, linetype="dashed",alpha=0.7)+
  theme(panel.grid = element_blank(),
        axis.text = element_text(color="black")) +
  scale_shape_manual(values = c(shapes[1:6])) +
  scale_y_continuous(limits=c(0,1),
                     breaks=seq(0,1,0.1),
                     labels=function(x)paste0(x*100,"%")) +
  labs(x=NULL,y=NULL,shape=NULL,color=NULL) +
  annotate("text",x=4,y=0.87,label="Estimated share of citizens who support\ncutting taxes on high incomes (%)",size=3.5,alpha=0.8)


################# STUDY 2 ################# 

# Aggregated registry data on actual tax rates
reg_data  <- read_excel("Statistics Norway tax rates, for Study 2.xlsx")
reg_data <- reg_data %>% mutate(predicted=rate_nowealth) %>% dplyr::select(amount,predicted)
reg_data$predicted <- reg_data$predicted*100
reg_data$var <- "Average effective tax rates (2018)"
reg_data$conf.high <- NA
reg_data$conf.low <- NA
reg_data$std.error <- NA

# Norwegian Citizen Panel, data prep
d <- read_excel("NCP19 data, for Study 2.xlsx")

# Assigned order of income amounts
d$order_group <- d$r19pad6_ran
# Preferred rate
d <- d %>% mutate(tax_a=coalesce(r19pad6a_1,
                                 r19pad6b_10),
                  tax_b=coalesce(r19pad6a_2,
                                 r19pad6b_9),
                  tax_c=coalesce(r19pad6a_3,
                                 r19pad6b_8),
                  tax_d=coalesce(r19pad6a_4,
                                 r19pad6b_7),
                  tax_e=coalesce(r19pad6a_5,
                                 r19pad6b_6),
                  tax_f=coalesce(r19pad6a_6,
                                 r19pad6b_5),
                  tax_g=coalesce(r19pad6a_7,
                                 r19pad6b_4),
                  tax_h=coalesce(r19pad6a_8,
                                 r19pad6b_3),
                  tax_i=coalesce(r19pad6a_9,
                                 r19pad6b_2),
                  tax_j=coalesce(r19pad6a_10,
                                 r19pad6b_1))
# Tax knowledge
tax_tab <- reg_data %>% dplyr::select(predicted)
tax_tab$knowledge_amount <- row.names(tax_tab)
d$knowledge_amount <- d$r19pad7_ran
d <- merge(d,tax_tab,by="knowledge_amount",all.x = T)
d$guess_group <- factor(d$r19pad7_ran,labels=c("$11,000","$28,000","$55,000","$83,000","$110,000","$220,000","$550,000",
                                               "$1,100,000","$5,500,000","$11,000,000"))
d$guess <- d$r19pad7_1
d$knowledge_nonabs <- d$r19pad7_1 - d$predicted
d$knowledge <- abs(d$knowledge_nonabs)
d$knowledge_dec <- car::recode(ntile(-d$knowledge,5),"1:4=0;5=1;else=NA")
d$progressivity <- d$tax_j-d$tax_a
# Background
d$resp_inc_old<-car::recode(d$r19pad8,"97:98=NA")
d$resp_inc<-as.factor(ntile(as.numeric(d$resp_inc_old),10))
d$edu_big <- as.factor(as.numeric(car::recode(d$r19P4_2,"12:97=NA")))
d$edu_d<-as.factor(ntile(as.numeric(d$edu_big),10))
d$occupation <- as.factor(car::recode(d$c18bk21,"97:98=NA;7=NA"))
d$age <- factor(d$r19P5_1)
d$gender <- factor(d$r19P1)
d$region <- factor(d$r19P2)
# Percentile income variable
d1 <- data.frame(table(d$resp_inc_old))
d1$prop <- d1$Freq/sum(d1$Freq, na.rm=T)
d1$cumu <- cumsum(d1$prop)
d1 <- mutate(d1, score = ((cumu - lag(cumu))/2)+lag(cumu)) # Make percentile midpoint scores
d1$score[1] <- d1$cumu[1]-(d1$cumu[1]/2) # Make percentile midpoint score for first category since formula above returns NA for that one
d1 <- d1 %>% dplyr::rename(resp_inc_old=Var1,resp_inc_percentile=score) %>% dplyr::select(resp_inc_old,resp_inc_percentile)
d <- merge(d,d1,by="resp_inc_old",all.x=T)
# Party ad ideology
d$party<- car::recode(d$r19pk204,"1='Christ. Dem. Party'; 2='Conservative Party'; 3='Progress Party'; 4='Liberal Party'; 5='Soc. Left Party'; 6='Centre Party'; 7='Green Party'; 8='Labor Party'; 9='Red Party';else=NA")
d$party <- factor(d$party, levels(factor(d$party))[c(8,4,9,5,1,2,3,7,6)])
d$party_cat <- as.factor(car::recode(as.numeric(d$r19pk204),"5=1; 8:9=1; 1=2; 4=2; 6:7=2;  2:3=3; else=NA "))
ncp <- d
# Stacked dataset
dm <- d %>% gather(amount,tax,tax_a:tax_j)
tax_tab <- reg_data %>% dplyr::select(predicted) %>% mutate(mean2=predicted,predicted=NULL)
tax_tab$amount <- c("tax_a","tax_b","tax_c","tax_d","tax_e",
                    "tax_f","tax_g","tax_h","tax_i","tax_j")
tax_tab$amount_lab <- factor(c("$11,000",
                               "$28,000",
                               "$55,000",
                               "$83,000",
                               "$110,000",
                               "$220,000",
                               "$550,000",
                               "$1,100,000",
                               "$5,500,000",
                               "$11,000,000"),
                             levels=c("$11,000",
                                      "$28,000",
                                      "$55,000",
                                      "$83,000",
                                      "$110,000",
                                      "$220,000",
                                      "$550,000",
                                      "$1,100,000",
                                      "$5,500,000",
                                      "$11,000,000"))
tax_tab$amount_num <- c(100000,250000,500000,750000,1000000,
                        2000000,5000000,10000000,50000000,100000000)
tax_tab1 <- tax_tab %>% 
  mutate(group=amount_lab,
         predicted=mean2,
         x="Average effective tax rates (2018)") %>%
  dplyr::select(group,predicted,x)
dm <- merge(dm,tax_tab,by="amount",all.x = T)
dm$deviation_nonabs <- dm$tax - dm$mean2
dm$deviation <- abs(dm$deviation_nonabs)
dm$deviation_dic <- ifelse(dm$deviation_nonabs>0,1,ifelse(dm$deviation_nonabs<0,0,NA))
dm$deviation_dic_5 <- ifelse(dm$deviation_nonabs>=5,1,ifelse(dm$deviation_nonabs<5,0,NA))
dm$deviation_dic_10 <- ifelse(dm$deviation_nonabs>=10,1,ifelse(dm$deviation_nonabs<10,0,NA))
dm$deviation_dic_15 <- ifelse(dm$deviation_nonabs>=15,1,ifelse(dm$deviation_nonabs<15,0,NA))


# Table 1: Effective tax rates for different incomes in 2018
tax  <- read_excel("Statistics Norway tax rates, for Study 2.xlsx")
tax2 <- tax %>% dplyr::select(amount_dollar,
                              dist_place,
                              intervall_dollar,
                              SSB_antall,
                              rate_nowealth) %>% 
  mutate(rate_nowealth=round(rate_nowealth*100,1))
kable(tax2,booktabs=T, 
      align=c("rlrrc"),
      linesep = '',
      caption="Effective tax rates for different incomes in 2018",
      col.names = c("Target annual income (USD)","Approx. point in the income distribution","Interval around target income (USD)","N taxpayers in interval","Income tax as a share of gross income (average %)")) %>%
  column_spec(1:5,width="1in") %>%
  footnote("Source: Registry data for Norwegian tax payers for the year 2018. The data were compiled by Statistics Norway in January 2021. Income tax is calculated as the sum of all types of income tax paid to municipality, county, and state, as well as National Insurance Scheme members' contributions (details in Appendix). Locations in the income distribution are from the World Inequality Database (https://wid.world/).",threeparttable = T)

# Table 2: Actual and preferred income tax rates by size of income
funn<-function(x){
  d1 <- dm %>% filter(amount_lab==x)
  m1 <- lm(tax~tax,data=d1,weights=r19Weight2)
  predicted <- summary(m1)$coeff[1,1]
  se <- summary(m1)$coeff[1,2]
  dat <- data.frame(predicted,se)
  dat$group <- x
  dat
}
p<-do.call(rbind,(lapply(unique(dm$amount_lab),funn)))
funn<-function(x){
  d1 <- dm %>% filter(amount_lab==x)
  m1 <- lm(deviation_nonabs~deviation_nonabs,data=d1,weights=r19Weight2)
  predicted <- summary(m1)$coeff[1,1]
  se <- summary(m1)$coeff[1,2]
  dat <- data.frame(predicted,se)
  dat$group <- x
  dat
}
p1<-do.call(rbind,(lapply(unique(dm$amount_lab),funn)))
p2 <- tax_tab1 %>% mutate(x=row.names(tax_tab1)) %>% dplyr::select(x,predicted)
tab <- data.frame(usd=p$group,
                  off=sprintf("%.1f",round(p2$predicted,1)),
                  pref=paste0(sprintf("%.1f",round(p$predicted,1))," (",sprintf("%.2f",round(p$se,2)),")"),
                  diff=paste0(ifelse(p1$predicted>0,paste0("+",sprintf("%.1f",round(p1$predicted,1))),sprintf("%.1f",round(p1$predicted,1)))," (",sprintf("%.2f",round(p1$se,2)),")"))
dists <- c("r","c","c","c")
kable(tab,booktabs=T, 
      align=dists,
      linesep = '',
      caption="Actual and preferred income tax rates by size of income",
      col.names = c("Annual taxable income", "Average effective income tax rate in 2018 (%)","Average preferred rate","Difference from actual rate")) %>%
  pack_rows("Bottom 99 percent",1,6) %>%
  pack_rows("Top 1 percent",7,10) %>%
  add_header_above(c(" "=2,"Estimates from survey %
                     (standard errors in parentheses)"=2)) %>%
  column_spec(1,width="1in") %>%
  column_spec(2,width="1.2in") %>%
  column_spec(3,width="1in") %>%
  column_spec(4,width="1.1in") %>%
  footnote("Survey results are from NCP Round 19 (2020). Each respondent was asked about preferred rates for all ten income sizes, resulting in a total of 17,209 observations given by 1,754 respondents. Actual tax rates are based on registry data for Norwegian tax payers provided by Statistics Norway.",threeparttable = T) %>%
  kable_styling()

# Figure 3: Preferred vs. actual tax rates
funn<-function(x){
  d1 <- dm %>% filter(amount_lab==x)
  m1 <- lm(tax~tax,data=d1,weights=r19Weight2)
  predicted <- summary(m1)$coeff[1,1]
  se <- summary(m1)$coeff[1,2]
  dat <- data.frame(predicted,se)
  dat$group <- x
  dat
}
p<-do.call(rbind,(lapply(unique(dm$amount_lab),funn)))
p$conf.high <- p$predicted + p$se*1.96
p$conf.low <- p$predicted - p$se*1.96
p$x <- "Average preferred"
p1 <- rbind.fill(p,tax_tab1)
m <- lm(guess~guess_group,weight=r19Weight2,data=d)
p2 <- sjlabelled::as_label(ggemmeans(m,terms=c("guess_group")))
p2 <- p2[,-6] %>% 
  dplyr::rename(group=x,
                se=std.error) %>%
  mutate(x="Guess")
p1 <- rbind(p1,p2)
p1$x <- factor(p1$x,levels(factor(p1$x))[c(2,3,1)])
p1 <- arrange(p1,p1$x) # To get the ideal order of layers
p<-ggplot(p1%>%filter(x!="Guess"),aes(x=group,
                                      y=predicted,
                                      color=x,
                                      shape=x,
                                      group=x)) +
  theme_m() +
  geom_line(size=1) +
  geom_line(aes(y=conf.low),size=0.3) +
  geom_line(aes(y=conf.high),size=0.3) +
  geom_point(size=3,fill="white") +
  scale_shape_manual(values=c(17,16)) +
  scale_y_continuous(breaks=seq(0,60,10),labels=function(x)paste0(x,"%")) + # NOTE: Do not include limits, ruins extra x-asis brackets using coord cartesion below
  theme(panel.grid.major = element_line(color="grey95"),
        legend.position = "none",
        axis.text.x = element_text(angle=45, hjust=1)) +
  labs(x=NULL,y="Tax rate",color=NULL,
       shape=NULL) +
  scale_color_manual(values=c(pal_r[c(1)],"black")) +
  annotate("label",x=6.3,y=51,label="Citizens' preferred tax rates",label.size=NA,size=3.5) +
  annotate("label",x=8,y=16,label="Actual effective\ntax rates",label.size=NA,size=3.5) +
  geom_segment(aes(x = 6.3, y = 48, xend = 6.6, yend = 43),
               arrow = arrow(length=unit(0.2,"cm"),type = "closed"),size=0.2,color="black") +
  geom_segment(aes(x = 7.9, y = 20, xend = 8.5, yend = 27),
               arrow = arrow(length=unit(0.2,"cm"),type = "closed"),size=0.2,color="black")
col <- "black"
l1 <- -18
l2 <- -22
p + coord_cartesian(ylim = c(0,55), clip = "off") + # Make sure inside of plot is not changed
  theme(plot.margin=unit(c(0.5,0.5,1.7,0.5),"cm")) + # Make room
  annotate("text", x = 2.85, y = l2-2.5, label = "Bottom 99 percent",color=col) +
  geom_segment(x=0.4,y=l2,xend=5.5,yend=l2,color=col,size=0.3) +
  annotate("text", x = 8.2, y = l2-2.5, label = "Top 1 percent",color=col) +
  geom_segment(x=6,y=l2,xend=10.5,yend=l2,color=col,size=0.3) +
  annotate("text", x = 5.5, y = l2-8, label = "Annual taxable income")

# Figure 4: Preferred vs. actual tax rates for six groups of respondents
## Income
m<-lm(tax~amount_lab*resp_inc,weights=r19Weight2,data=dm)
pred_inc <- data.frame(ggemmeans(m, terms = c("resp_inc [1,10]","amount_lab")))
pred_inc$x <- car::recode(pred_inc$x,"1='Low income'; 10='High income'")
## Education
m<-lm(tax~amount_lab*edu_d,weights=r19Weight2,data=dm)
pred_edu <- data.frame(ggemmeans(m, terms = c("edu_d [1,10]","amount_lab")))
pred_edu$x <- car::recode(pred_edu$x,"1='Low education'; 10='High education'")
## Party
m<-lm(tax~amount_lab*party_cat,weights=r19Weight2,data=dm)
pred_party <- data.frame(ggemmeans(m, terms = c("party_cat [1,3]","amount_lab")))
pred_party$x <- car::recode(pred_party$x,"1='Left-wing'; 3='Right-wing'")
## Combine
tax_tab2<-tax_tab1
tax_tab2$x[tax_tab2$x=="Average effective tax rates (2018)"] <- "Actual tax rates"
pred_all <- rbind.fill(pred_inc,pred_edu,pred_party,tax_tab2)
pred_all$x <- factor(pred_all$x, levels(factor(pred_all$x))[c(2,4,6,5,3,7,1)])
## Plot
p<-ggplot(pred_all,aes(x=group,y=predicted,color=x,shape=x,group=x)) +
  theme_m() +
  geom_line(size=0.8) +
  scale_shape_manual(values = c(shapes[2:7],16)) +
  geom_point(size=2,fill="white") +
  scale_y_continuous(breaks=seq(0,60,10),labels=function(x) paste0(x,"%")) +
  theme(legend.position = "right",
        axis.text.x = element_text(angle=45, hjust=1),
        text = element_text(color="black"),
        axis.text = element_text(color="black"),
        legend.text = element_text(margin = margin(t=4,b=4))) +
  labs(x=NULL,y="Tax rate",color=NULL,shape=NULL) +
  scale_color_manual(values=c(pal_r[c(1:6)],"black"))
col <- "black"
l1 <- -17
l2 <- -22
p + coord_cartesian(ylim = c(0,60), clip = "off") + # Make sure inside of plot is not changed
  theme(plot.margin=unit(c(0.5,0.5,1.5,0.5),"cm")) + # Make room
  annotate("text", x = 2.85, y = l2-2.5, label = "Bottom 99 percent",color=col) +
  geom_segment(x=0.4,y=l2,xend=5.5,yend=l2,color=col,size=0.3) +
  annotate("text", x = 8.2, y = l2-2.5, label = "Top 1 percent",color=col) +
  geom_segment(x=6,y=l2,xend=10.5,yend=l2,color=col,size=0.3) +
  annotate("text", x = 5.5, y = l2-8, label = "Annual taxable income")


################# STUDY 3 ################# 

# Load data
yg <- read_sav("YouGov survey, for Study 3.sav")
# Data prep
## Labor income
yg$tax_a_l <- coalesce(yg$UB1_1_1, yg$UB4_1_1)
yg$tax_b_l <- coalesce(yg$UB1_2_1, yg$UB4_2_1)
yg$tax_c_l <- coalesce(yg$UB1_3_1, yg$UB4_3_1)
yg$tax_d_l <- coalesce(yg$UB1_4_1, yg$UB4_4_1)
yg$tax_e_l <- coalesce(yg$UB1_5_1, yg$UB4_5_1)
yg$tax_f_l <- coalesce(yg$UB1_6_1, yg$UB4_6_1)
yg$tax_g_l <- coalesce(yg$UB1_7_1, yg$UB4_7_1)
yg$tax_h_l <- coalesce(yg$UB1_8_1, yg$UB4_8_1)
yg$tax_i_l <- coalesce(yg$UB1_9_1, yg$UB4_9_1)
yg$tax_j_l <- coalesce(yg$UB1_10_1, yg$UB4_10_1)
## Capital income
yg$tax_a_c <- coalesce(yg$UB2_1_1, yg$UB3_1_1)
yg$tax_b_c <- coalesce(yg$UB2_2_1, yg$UB3_2_1)
yg$tax_c_c <- coalesce(yg$UB2_3_1, yg$UB3_3_1)
yg$tax_d_c <- coalesce(yg$UB2_4_1, yg$UB3_4_1)
yg$tax_e_c <- coalesce(yg$UB2_5_1, yg$UB3_5_1)
yg$tax_f_c <- coalesce(yg$UB2_6_1, yg$UB3_6_1)
yg$tax_g_c <- coalesce(yg$UB2_7_1, yg$UB3_7_1)
yg$tax_h_c <- coalesce(yg$UB2_8_1, yg$UB3_8_1)
yg$tax_i_c <- coalesce(yg$UB2_9_1, yg$UB3_9_1)
yg$tax_j_c <- coalesce(yg$UB2_10_1, yg$UB3_10_1)
## Combine and recode
yg_long <- yg %>% gather(var, val,tax_a_l:tax_j_c)
yg_long$source <- str_sub(yg_long$var,-1,-1)
yg_long$amount <- str_sub(yg_long$var,1,5)
yg_long$amount <- factor(yg_long$amount, 
                         labels=c("$11,000","$28,000","$55,000","$83,000","$110,000","$220,000","$550,000","$1,100,000","$5,500,000","$11,000,000"))
yg_long$source <- factor(yg_long$source, labels=c("Capital income",
                                                  "Labor income"))
yg_long$income <- yg_long$household_income_rc
yg_long$party <- factor(car::recode(yg_long$FT_next, "10:hi=NA"))

# Figure 5: Preferred and actual tax rates for labor income and capital income
d1 <- yg_long %>%
  group_by(source,amount) %>%
  dplyr::summarise(mean=weighted.mean(val,w=weight,na.rm=T),
                   se=sqrt(var(val)/length(val)))
d1$conf.high <- d1$mean + d1$se*1.96
d1$conf.low <- d1$mean - d1$se*1.96
tax_off <- read_excel("Tax rates, Skatteetaten, for Study 3.xlsx") %>% 
  dplyr::select(amount_num,labor,capital_low,capital_high)
tax_off$amount <- factor(tax_off$amount_num,labels=c("$11,000","$28,000","$55,000","$83,000","$110,000","$220,000","$550,000","$1,100,000","$5,500,000","$11,000,000"))
## Labor plot
d1_l <- merge(d1%>%filter(source=="Labor income"),
              tax_off%>%dplyr::select(amount,labor),
              by=c("amount")) %>%
  gather(var,val,c(mean,labor))
d1_l$var <- car::recode(d1_l$var,"'mean'='Average preferred tax rate on labor income';
                        'labor'='Actual tax rate on labor income'")
d1_l$var<-factor(d1_l$var,levels(factor(d1_l$var))[c(2,1)])
d1_l <- arrange(d1_l,d1_l$var) # To get the ideal order of layers
p1<-ggplot(d1_l,aes(amount,val)) +
  theme_m() +
  geom_line(aes(linetype=var,color=var,group=var),size=1) +
  geom_line(aes(y=conf.low,group=1),size=0.3,color=pal_r[11]) +
  geom_line(aes(y=conf.high,group=1),size=0.3,color=pal_r[11]) +  
  geom_point(aes(shape=var,color=var),size=2.3) +
  scale_shape_manual(values=c(16,15)) +
  scale_linetype_manual(values=c("solid","solid")) +
  scale_color_manual(values=c(pal_r[c(11,8)])) +
  scale_y_continuous(breaks=seq(0,60,10),
                     limits=c(0,50),labels=function(x)paste0(x,"%")) + # NOTE: Do not include limits, ruins extra x-asis brackets using coord cartesion below
  theme(panel.grid.major = element_line(color="grey95"),
        legend.position = "bottom",
        axis.text.x = element_text(angle=45, hjust=1),
        text = element_text(size=12),
        plot.margin=unit(c(0.3,0.5,0.5,0.3),"cm")) +
  guides(color=guide_legend(ncol=1,reverse=T),
         linetype=guide_legend(ncol=1,reverse=T),
         shape=guide_legend(ncol=1,reverse=T)) +
  labs(x=NULL,
       y="Tax rate",
       color=NULL,linetype=NULL,shape=NULL,
       title="Labor income")
## Capital plot
d1_c <- merge(d1%>%filter(source=="Capital income"),
              tax_off%>%dplyr::select(amount,capital_high,capital_low),
              by=c("amount")) %>%
  gather(var,val,c(mean,capital_high,capital_low)) 
d1_c$var <- car::recode(d1_c$var,"'mean'='Average preferred tax rate on capital income';
                        'capital_high'='Actual tax rate on capital income (gains/dividends)';'capital_low'='Actual tax rate on capital income (interest/rents)'")
d1_c$var<-factor(d1_c$var,levels(factor(d1_c$var))[c(2,1,3)])
d1_c <- arrange(d1_c,d1_c$var) # To get the ideal order of layers
p2<-ggplot(d1_c,aes(amount,val)) +
  theme_m() + 
  geom_line(aes(linetype=var,color=var,group=var),size=1) +
  geom_line(aes(y=conf.low,group=1),size=0.3,color=pal_r[7]) +
  geom_line(aes(y=conf.high,group=1),size=0.3,color=pal_r[7]) +  
  geom_point(aes(shape=var,color=var),size=2.3) +
  scale_shape_manual(values=c(15,17,16)) +
  scale_linetype_manual(values=c("solid","solid","solid")) +
  scale_color_manual(values=c(pal_r[c(4,10,7)])) +
  scale_y_continuous(breaks=seq(0,60,10),
                     limits=c(0,50),labels=function(x)paste0(x,"%")) + # NOTE: Do not include limits, ruins extra x-asis brackets using coord cartesion below
  theme(panel.grid.major = element_line(color="grey95"),
        legend.position = "bottom",
        axis.text.x = element_text(angle=45, hjust=1),
        text = element_text(size=12),
        plot.margin=unit(c(0.3,1,0.3,0.3),"cm")) +
  guides(color=guide_legend(ncol=1,reverse=T),
         linetype=guide_legend(ncol=1,reverse=T),
         shape=guide_legend(ncol=1,reverse=T)) +
  labs(x=NULL,
       y=NULL,
       color=NULL,linetype=NULL,shape=NULL,
       title="Capital income")
## Combine
grid.arrange(p1,p2,ncol=2)




